{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "506fab2a-a50c-42bd-a106-c83a9d2828ea",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "--2024-06-13 13:53:24--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 3832 (3.7K) [text/plain]\n",
      "Saving to: 'minsearch.py'\n",
      "\n",
      "     0K ...                                                   100%  579K=0.006s\n",
      "\n",
      "2024-06-13 13:53:24 (579 KB/s) - 'minsearch.py' saved [3832/3832]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!rm -f minsearch.py\n",
    "!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3ac947de-effd-4b61-8792-a6d7a133f347",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<minsearch.Index at 0x1d9c9bd8890>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import requests \n",
    "import minsearch\n",
    "\n",
    "docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'\n",
    "docs_response = requests.get(docs_url)\n",
    "documents_raw = docs_response.json()\n",
    "\n",
    "documents = []\n",
    "\n",
    "for course in documents_raw:\n",
    "    course_name = course['course']\n",
    "\n",
    "    for doc in course['documents']:\n",
    "        doc['course'] = course_name\n",
    "        documents.append(doc)\n",
    "\n",
    "index = minsearch.Index(\n",
    "    text_fields=[\"question\", \"text\", \"section\"],\n",
    "    keyword_fields=[\"course\"]\n",
    ")\n",
    "\n",
    "index.fit(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8f087272-b44d-4738-9ea2-175ec63a058b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def search(query):\n",
    "    boost = {'question': 3.0, 'section': 0.5}\n",
    "\n",
    "    results = index.search(\n",
    "        query=query,\n",
    "        filter_dict={'course': 'data-engineering-zoomcamp'},\n",
    "        boost_dict=boost,\n",
    "        num_results=5\n",
    "    )\n",
    "\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "742ab881-499a-4675-83c4-2013ea1377b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_prompt(query, search_results):\n",
    "    prompt_template = \"\"\"\n",
    "You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.\n",
    "Use only the facts from the CONTEXT when answering the QUESTION.\n",
    "\n",
    "QUESTION: {question}\n",
    "\n",
    "CONTEXT: \n",
    "{context}\n",
    "\"\"\".strip()\n",
    "\n",
    "    context = \"\"\n",
    "    \n",
    "    for doc in search_results:\n",
    "        context = context + f\"section: {doc['section']}\\nquestion: {doc['question']}\\nanswer: {doc['text']}\\n\\n\"\n",
    "    \n",
    "    prompt = prompt_template.format(question=query, context=context).strip()\n",
    "    return prompt\n",
    "\n",
    "def llm(prompt):\n",
    "    response = client.chat.completions.create(\n",
    "        model='phi3',\n",
    "        messages=[{\"role\": \"user\", \"content\": prompt}]\n",
    "    )\n",
    "    \n",
    "    return response.choices[0].message.content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fe8bff3e-b672-42be-866b-f2d9bb217106",
   "metadata": {},
   "outputs": [],
   "source": [
    "def rag(query):\n",
    "    search_results = search(query)\n",
    "    prompt = build_prompt(query, search_results)\n",
    "    answer = llm(prompt)\n",
    "    return answer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "091a77e6-936b-448e-a04b-bad1001f5bb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "\n",
    "client = OpenAI(\n",
    "    base_url='http://localhost:11434/v1/',\n",
    "    api_key='ollama',\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9ee527a3-3331-4f4e-b6c8-f659ffc113f5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' This statement serves as an example to verify the functionality of various systems, such as text processing software or programming functions. It\\'s commonly used by developers during debugging sessions to ensure commands are working correctly without producing any unintended output.\\n\\nHere\\'s how you might include it in different contexts:\\n\\n**1. Using it as a command line test in a script:**\\nIf writing a shell script or using command-line tools, the statement can be inserted directly to demonstrate functionality. For instance, using `echo` on Unix-like systems:\\n```bash\\n#!/bin/bash\\necho \"This is a test\"\\necho \"This is also a test for confirmation.\"\\n```\\n\\n**2. Inserting it into a programming function as a placeholder or comment (in Python):**\\nAs a comment in code to remind future developers that the block can be replaced with actual implementation:\\n```python\\ndef process_text(input_string):\\n    # Test input: \"This is a test\"\\n    print(\"Testing...\")\\n    # Replace this line with your processing logic\\n    return input_string.upper()  # Example operation\\n```\\n\\n**3. Using in documentation or comments within software development code:**\\nDemonstrate how the statement can be used to clarify intentions when developing software, such as in a README file or inline comment:\\n```markdown\\n# Test Command Functionality\\nThis section contains commands that serve to test system functionality.\\n`echo \"This is a test\"` - A simple command to check output behavior.\\n```\\n\\nIn each case, the statement `This is a test` fulfills its role as a straightforward demonstration or placeholder within development and testing workflows.'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "llm('write that this is a test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "21aa255e-c971-44ca-9826-a721df3ad063",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " This statement serves as an example to verify the functionality of various systems, such as text processing software or programming functions. It's commonly used by developers during debugging sessions to ensure commands are working correctly without producing any unintended output.\n",
      "\n",
      "Here's how you might include it in different contexts:\n",
      "\n",
      "**1. Using it as a command line test in a script:**\n",
      "If writing a shell script or using command-line tools, the statement can be inserted directly to demonstrate functionality. For instance, using `echo` on Unix-like systems:\n",
      "```bash\n",
      "#!/bin/bash\n",
      "echo \"This is a test\"\n",
      "echo \"This is also a test for confirmation.\"\n",
      "```\n",
      "\n",
      "**2. Inserting it into a programming function as a placeholder or comment (in Python):**\n",
      "As a comment in code to remind future developers that the block can be replaced with actual implementation:\n",
      "```python\n",
      "def process_text(input_string):\n",
      "    # Test input: \"This is a test\"\n",
      "    print(\"Testing...\")\n",
      "    # Replace this line with your processing logic\n",
      "    return input_string.upper()  # Example operation\n",
      "```\n",
      "\n",
      "**3. Using in documentation or comments within software development code:**\n",
      "Demonstrate how the statement can be used to clarify intentions when developing software, such as in a README file or inline comment:\n",
      "```markdown\n",
      "# Test Command Functionality\n",
      "This section contains commands that serve to test system functionality.\n",
      "`echo \"This is a test\"` - A simple command to check output behavior.\n",
      "```\n",
      "\n",
      "In each case, the statement `This is a test` fulfills its role as a straightforward demonstration or placeholder within development and testing workflows.\n"
     ]
    }
   ],
   "source": [
    "print(_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "988ece59-951a-4b32-ba3f-cb8efb66a9bb",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
